options(scipen=999)

#### Read in CED data ####
ced <- read_csv("Data/analysis/ced-clean-analysis.csv") %>% 
  filter(num_cand != 1)

#### Read in Senate DIME data ####
dime <- read_csv("Data/analysis/dime-clean-analysis.csv",
                 guess_max = 40000)

primary <- dime %>%
  filter(!is.na(ppct)) %>%
  filter(party == "D" |
           party == "R") %>% 
  filter(num_prim_opps > 1,
         ppct != 100,
         ppct != 0)

general <- dime %>%
  filter(!is.na(gpct)) %>%
  filter(party == "D" |
           party == "R") %>% 
  filter(gpct != 100,
         gpct != 0)

#### Define clustering variables ####
ced <- ced %>% 
  mutate(cluster_var = raceid)

primary <- primary %>% 
  mutate(cluster_var = dcp)

general <- general %>% 
  mutate(cluster_var = id)

#### Create variable that is the index of first and last name pronounceability/commonality ####
ced <- ced %>% 
  mutate(full_name_algorithm = (last_algorithm + first_algorithm)/2,
         full_name_commonality = (percent_freq + fprop)/2)

primary <- primary %>% 
  mutate(full_name_algorithm = (last_algorithm + first_algorithm)/2,
         full_name_commonality = (percent_freq + fprop)/2)

general <- general %>% 
  mutate(full_name_algorithm = (last_algorithm + first_algorithm)/2,
         full_name_commonality = (percent_freq + fprop)/2)

#### Define functions ####
get_r2 <- function(model) {
  out <- round(summary(model)$adj.r.squared, 4)
  return(out)
}

get_n <- function(model) {
  out <- nobs(model)
  return(out)
}

get_clusters <- function(model) {
  mod_cluster <-  vcovHC(model, type = "HC0", cluster = "cluster_var", adjust = T)
  out <- coeftest(model, vcov = mod_cluster, save = T)
  return(out)
}

#### Define Stargazer arguments ####
star.cutoffs = c(.05, .01, .001)
star.char = c("*", "**", "***")
notes = "Standard errors in parentheses and clustered by electoral race"
column.labels = c("General Elections", "Primary Elections", "Local Elections")
keep.stat = c("n", "adj.rsq")
style = "ajps"
keep = c("last_algorithm",
         "first_algorithm",
         "percent_freq",
         "fprop",
         "white",
         "incumbent",
         "female",
         "lchars",
         "fchars",
         "race",
         "num_prim_opps",
         "totvotes1000",
         "office",
         "seats_comps")

keep_congress = c("last_algorithm",
                  "first_algorithm",
                  "percent_freq",
                  "fprop",
                  "white",
                  "incumbent",
                  "female",
                  "lchars",
                  "fchars",
                  "race",
                  "num_prim_opps")

covariate.labels = c("Surname Pronounceability",
                     "First Name Pronounceability",
                     "Surname Commonality",
                     "First Name Commonality",
                     "White",
                     "Incumbent",
                     "Female",
                     "Surname Length",
                     "First Name Length",
                     "Senate Election",
                     "Number of Primary Opponents",
                     "Total Election Votes/1000",
                     "School Board Election",
                     "Seats/Competitors")

covariate.labels_congress = c("Surname Pronounceability",
                              "First Name Pronounceability",
                              "Surname Commonality",
                              "First Name Commonality",
                              "White",
                              "Incumbent",
                              "Female",
                              "Surname Length",
                              "First Name Length",
                              "Senate Election",
                              "Number of Primary Opponents")